\documentclass[unicode]{beamer}
\mode<presentation>
\usepackage{beamerthemesplit}
\usetheme{Malmoe}
\definecolor{mycolor}{RGB}{5,135,144}
\usecolortheme[named=mycolor]{structure}
%\usepackage[cp1251]{inputenc}
\usepackage[english]{babel}
\usepackage[T2A]{fontenc}
\usepackage{amssymb,amsfonts,amsmath}
\usepackage{array}
\usepackage{graphicx}
\usepackage{gastex}
\usepackage{wrapfig}
\usepackage{algorithm,algpseudocode}
\usepackage{tikz}
\usepackage[]{color}%,dvips
\usepackage{pstricks,pst-node,pst-text}%,pst-3d
\definecolor{dred}{rgb}{0.6, 0, 0}

\expandafter\def\expandafter\insertshorttitle\expandafter{%
  \insertshorttitle\hfill%
  \insertframenumber\,/\,\inserttotalframenumber}

\def\append{{\bf append}}
\def\iappend{{\bf iappend}}
\def\len{{\bf len}}
\def\nextPal{{\bf nextPal}}
\def\maxPal{{\bf maxPal}}
\def\Pal{{Pal}}
\algtext*{EndWhile}% Remove "end while" text
\algtext*{EndIf}% Remove "end if" text
\algtext*{EndFunction}% Remove "end function" text
\algtext*{EndProcedure}% Remove "end procedure" text
\algtext*{EndFor}% Remove "end for" text

\let\OldComment\Comment
\renewcommand{\Comment}[1]{\OldComment{\textcolor{gray}{#1}}}


\author[Dmitry Kosolobov]{Dmitry Kosolobov}
\title{Faster Lightweight Lempel--Ziv Parsing}

\institute[]{Ural Federal University\\Ekaterinburg, Russia}
\begin{document}
\date{ }
\maketitle

\section{Introduction}

\begin{frame}
\frametitle{Lempel--Ziv decomposition}
\visible<2->{
$w = u_1u_2\cdots u_k$ is the \emph{Lempel--Ziv decomposition} if for $i \in [1..k]$:
\begin{enumerate}
\item $u_i$ is a letter that does not appear in $u_1u_2\cdots u_{i-1}$ or
\item $u_i$ is the longest nonempty substring having at least two occurrences in $u_1u_2\cdots u_i$
\end{enumerate}
}
\visible<3->{$u_1, u_2, \ldots, u_k$ are the \emph{Lempel--Ziv factors}}
\visible<4->{
\begin{block}{Example}
$$
\begin{array}{l}
s = {\color<8,12,16,17,20,21>{red}a}{\color<8,12,16,17,20,21>{red}b}{\color<8,11,16,17,19>{red}a}{\color<11,19>{red}b}acab = {\color<5,14>{red}a}\cdot {\color<6,15>{red}b}\cdot {\color<7,8,16,17>{red}aba}\cdot {\color<9,18>{red}c}\cdot {\color<10,11,12,19,20,21>{red}ab}\\
\text{\visible<13->{output: }}{\visible<14->{a}}{\visible<15->{b}}{\visible<17->{(1,3)}}{\visible<18->{c}}{\visible<21->{(1,2)}}
\end{array}
$$
\end{block}
}
\end{frame}


\begin{frame}
\frametitle{Algorithms finding the Lempel--Ziv decomposition}
\pause
$n$ is the length of the input string over the integer alphabet $[0..\sigma{-}1]$\\
\pause
($n\log\sigma$ bits are for the readonly input string)\\
\pause
$\epsilon$ is a rational constant parameter\\
\pause
\begin{tabular}{|c|c|l|}
\hline
\small Time                       & \small Bits of space       &  \small Authors\\
\hline
$O(n)$                    & $O(n\log n)$         & \small many solutions\pause\\
$O(n\log\log\sigma)$ & $O(n\log\sigma)$ & \small Belazzougui and Puglisi 2015\pause\\
$O(n\log n\log\log\sigma)$ & $n\log\sigma+\epsilon n$ & \small K\"arkk\"ainen, Kempa, Puglisi 2013\pause\\
\hline
\hline
\textbf{$O(n(\log\sigma{+}\log\log n))$} & \textbf{$n\log\sigma + \epsilon n$} & \small \textbf{our result}\\
\hline
\end{tabular}
\end{frame}


\section{Algorithm}

\subsection{Overview}

\begin{frame}
\frametitle{Approach of K\"arkk\"ainen, Kempa, Puglisi}
\visible<2->{
\begin{figure}[htb]
\centering
\begin{picture}(100,18)
\gasset{Nframe=n,Nw=5,Nh=3,AHnb=0,linewidth=0.25}
\visible<8->{\put(72,8){\includegraphics[scale=0.32]{cards}}}
\visible<5,6>{\put(8,-2){\makebox(0,0)[cb]{$\underbrace{\phantom{aaaaaaaaaaaaaaa}}_b$}}}
\visible<5,6>{\put(37,-2){\makebox(0,0)[cb]{$\underbrace{\phantom{aaaaaaaaaaaaaaa}}_b$}}}
\visible<5->{\put(66,-2){\makebox(0,0)[cb]{$\underbrace{\phantom{aaaaaaaaaaaaaaa}}_b$}}}
\visible<5,6>{\put(95,-2){\makebox(0,0)[cb]{$\underbrace{\phantom{aaaaaaaaaaaaaaa}}_b$}}}
\visible<8->{\put(66,5){\makebox(0,0)[cb]{$\overbrace{\phantom{aaaaaaaaaaaaaaa}}^{O(\epsilon n)\text{ bits}}$}}}
\visible<9->{
\visible<10->{\put(65,20){\line(-6,-1){70}}}
\visible<11->{\put(65,20){\line(-5,-1){60}}}
\visible<12->{\put(65,20){\line(-4,-1){50}}}
\visible<13->{\put(65,20){\line(-3,-1){38}}}
\visible<14->{\put(65,20){\line(-2,-1){25}}}
\visible<15->{\put(65,20){\line(-1,-1){11}}}
\visible<16->{\put(65,20){\line(0,-1){7}}}
\visible<17->{\put(65,20){\line(1,-1){11}}}
}
\node(input)(-13,5){input:}
\node(text)(50,5){
 $\spadesuit\,\heartsuit\,\diamondsuit\,\heartsuit\,\spadesuit\,\spadesuit\,\clubsuit\,\diamondsuit%
\,\clubsuit\,\heartsuit\,\spadesuit\,\heartsuit\,\spadesuit\,\clubsuit\,\heartsuit\,\spadesuit%
\,\clubsuit\,\clubsuit\,\spadesuit\,\diamondsuit\,\spadesuit\,\heartsuit\,\heartsuit\,\diamondsuit%
\,\heartsuit\,\clubsuit\,\spadesuit\,\heartsuit\,\spadesuit\,\clubsuit\,\spadesuit\,\heartsuit$}
\end{picture}
\end{figure}
}
\vskip-3mm
\begin{itemize}
\item<3-> Report the Lempel--Ziv factors from left to right
\item<4-> Use $n\log\sigma + O(\epsilon n)$ bits of space (substitute $\epsilon' = c\epsilon$)
\item<5-> Split the input string on blocks of length $b$
\item<6-> Process each block as follows
\item<8-> Build a block index occupying $O(\epsilon n)$ bits
\item<9-> Scan the string from left to right up to the current block and find the Lempel--Ziv factors occurring in this block
\item<18-> Time: $O(\frac{n}b \times (\text{time for scanning and building of block index}))$
\end{itemize}
\end{frame}


\begin{frame}
\frametitle{Our modification}
\begin{itemize}
\item<2-> $b = \epsilon n / (\log\sigma + \log\log n)$
\item<3-> Block index occupies $O(b(\log\sigma + \log\log n)) = O(\epsilon n)$ bits%\includegraphics[scale=0.28]{hands}
\item<4-> Scanning and building of one block index take $O(n)$ time
\item<5-> The overall time is $O(\frac{n}b n) = O(n(\log\sigma + \log\log n))$
\end{itemize}
\visible<6->{\center{\huge{But!}}\\}
\visible<7->{
Our block index is a bit tricky
\vskip-5mm
\begin{figure}
\includegraphics[scale=0.28]{hands}
\end{figure}
}
\vskip-5mm
\visible<8->{Block index finds only the~Lempel--Ziv factors of length ${<}\frac{1}{\epsilon^2}\log^2 n$\\}
\visible<9->{Long factors ($\ge \frac{1}{\epsilon^2}\log^2 n$) must be processed separately}
\end{frame}


\subsection{Long factors}

\begin{frame}
\frametitle{Difference cover}
\visible<2->{$\tau = \lfloor\frac{1}{\epsilon}\log n\rfloor$; search the Lempel--Ziv factors of length ${\ge}\tau^2$}
\visible<3->{
\begin{figure}[htb]
\centering
\begin{picture}(100,10)
\gasset{Nframe=n,Nw=5,Nh=3,AHnb=0,linewidth=0.25}
\put(19.3,4){\makebox(0,0)[cb]{$\overbrace{\phantom{%
{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}}}^{\tau^2}$}}
\put(50.3,4){\makebox(0,0)[cb]{$\overbrace{\phantom{%
{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}}}^{\tau^2}$}}
\put(81.3,4){\makebox(0,0)[cb]{$\overbrace{\phantom{%
{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}}}^{\tau^2}$}}
\visible<4->{\put(8,   -1){\makebox(0,0)[cb]{$\underbrace{\phantom{{\circ}{\circ}{\circ}{\circ}}}_{\tau}$}}}
\visible<5->{\put(15.5,-1){\makebox(0,0)[cb]{$\underbrace{\phantom{{\circ}{\circ}{\circ}{\circ}}}_{\tau}$}}}
\visible<5->{\put(23,  -1){\makebox(0,0)[cb]{$\underbrace{\phantom{{\circ}{\circ}{\circ}{\circ}}}_{\tau}$}}}
\visible<5->{\put(30.5,-1){\makebox(0,0)[cb]{$\underbrace{\phantom{{\circ}{\circ}{\circ}{\circ}}}_{\tau}$}}}
\visible<4->{\put(39,-1){\makebox(0,0)[cb]{$\underbrace{\phantom{{\circ}{\circ}{\circ}{\circ}}}_{\tau}$}}}
\visible<5->{\put(46.5,  -1){\makebox(0,0)[cb]{$\underbrace{\phantom{{\circ}{\circ}{\circ}{\circ}}}_{\tau}$}}}
\visible<5->{\put(54,-1){\makebox(0,0)[cb]{$\underbrace{\phantom{{\circ}{\circ}{\circ}{\circ}}}_{\tau}$}}}
\visible<5->{\put(61.5,  -1){\makebox(0,0)[cb]{$\underbrace{\phantom{{\circ}{\circ}{\circ}{\circ}}}_{\tau}$}}}
\visible<4->{\put(70,  -1){\makebox(0,0)[cb]{$\underbrace{\phantom{{\circ}{\circ}{\circ}{\circ}}}_{\tau}$}}}
\visible<5->{\put(77.5,-1){\makebox(0,0)[cb]{$\underbrace{\phantom{{\circ}{\circ}{\circ}{\circ}}}_{\tau}$}}}
\visible<5->{\put(85,  -1){\makebox(0,0)[cb]{$\underbrace{\phantom{{\circ}{\circ}{\circ}{\circ}}}_{\tau}$}}}
\visible<5->{\put(92.5,-1){\makebox(0,0)[cb]{$\underbrace{\phantom{{\circ}{\circ}{\circ}{\circ}}}_{\tau}$}}}
\node(text)(50,5){
${\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}%
{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}%
{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}{\circ}$}
\visible<4->{\node(bul1)(7,5){${\bullet}{\bullet}{\bullet}{\bullet}$}}
\visible<4->{\node(bul2)(36.5,5){${\bullet}{\bullet}{\bullet}{\bullet}$}}
\visible<4->{\node(bul3)(66,5){${\bullet}{\bullet}{\bullet}{\bullet}$}}
\visible<5->{\node(bul11)(18.5,5){${\circ}{\circ}{\circ}{\bullet}{\circ}{\circ}{\circ}{\bullet}{\circ}{\circ}{\circ}{\bullet}$}}
\visible<5->{\node(bul22)(48,5){${\circ}{\circ}{\circ}{\bullet}{\circ}{\circ}{\circ}{\bullet}{\circ}{\circ}{\circ}{\bullet}$}}
\visible<5->{\node(bul33)(77.5,5){${\circ}{\circ}{\circ}{\bullet}{\circ}{\circ}{\circ}{\bullet}{\circ}{\circ}{\circ}{\bullet}$}}
\end{picture}
\end{figure}
}
\visible<6->{$O(\frac{n}{\tau})$ marked positions}
\visible<7->{
\begin{block}{Lemma}
For any $j, k{\ge}\tau^2$, there is $d{<}\tau^2$ such that $j{-}d$ and $k{-}d$ are marked.
\end{block}
}
\visible<8->{
\begin{figure}[htb]
\centering
\begin{picture}(100,10)
\gasset{Nframe=n,Nw=5,Nh=3,AHnb=0,linewidth=0.25}
\put(0,0){\includegraphics[scale=0.5]{line}}
\put(88.5,3){\makebox(0,0)[cb]{$\overbrace{\phantom{aaaaaaaaaaaaaaaaaa}}^{\text{Lempel--Ziv factor }u}$}}
\put(35.5,3){\makebox(0,0)[cb]{$\overbrace{\phantom{aaaaaaaaaaaaaaaaaa}}^{\text{occurrence of }u}$}}
\visible<9->{
\node(d1)(101.5,0){$\underbrace{\phantom{aaaa}}_{d < \tau^2}$}
\node(d2)(48.5,0){$\underbrace{\phantom{aaaa}}_{d < \tau^2}$}
\node(mark1)(97.5,2){$\bullet$}
\node(mark1)(44.5,2){$\bullet$}
\node(up1)(97.5,-1){$\uparrow$}
\node(up2)(44.5,-1){$\uparrow$}
\node(bar1)(97.5,-3){$|$}
\node(bar2)(44.5,-3){$|$}
\node(postext1)(98,-7){marked position}
\node(postext2)(45,-7){marked position}
}
\end{picture}
\end{figure}
}
\end{frame}

\begin{frame}
\frametitle{Long factors}
\visible<2->{
\begin{figure}
\begin{picture}(100,25)
\visible<2-11>{\put(2,25){\includegraphics[scale=0.5]{line2}}}
\visible<12-13>{\put(2,25){\includegraphics[scale=0.5]{line3}}}
\visible<14-20>{\put(2,25){\includegraphics[scale=0.5]{line4}}}
\visible<21-26>{\put(2,25){\includegraphics[scale=0.5]{line5}}}
\visible<27->{\put(2,25){\includegraphics[scale=0.5]{line6}}}
\visible<2-13>{\node(ui)(80,30){$u_i$}}
\visible<14-20>{\node(ui)(87,30){$u_i$}}
\visible<21->{\node(ui)(90,30){$u_i$}}
\visible<3->{\node(marks)(50,24.5){$\bullet\,\bullet\,\bullet\,\phantom{\bullet\,\bullet}\,\bullet\,\phantom{\bullet\,\bullet}\,\bullet%
\,\bullet\,\bullet\,\bullet\,\phantom{\bullet\,\bullet}\,\bullet\,\phantom{\bullet\,\bullet}\,\bullet%
\,\bullet\,\bullet\,\bullet\,\phantom{\bullet\,\bullet}\,\bullet\,\phantom{\bullet\,\bullet}\,\bullet%
\,\bullet\,\bullet\,\bullet\,\phantom{\bullet\,\bullet}\,\bullet$}}
\visible<5-7>{\put(-2.5,6.8){\includegraphics[scale=0.59]{prefixes}}}
\visible<8->{\put(-2.5,6.8){\includegraphics[scale=0.59]{prefixes2}}}
\visible<8-9>{\node(tau)(73.5,5){
$\underbrace{\phantom{\bullet\,\bullet\,\bullet\,\bullet\,\bullet\,\bullet\,\bullet\,\bullet\,\bullet}}_{\tau^2}$}}
\visible<4-26>{\node(tmark)(61,22){$t$}}
\visible<27->{\node(tmark)(68,22){$t$}}
\visible<12-26>{
\put(20,30){
\begin{tikzpicture}
\draw[<-] (0,0) -- (3.7,0);
\end{tikzpicture}
}}
\visible<14-20>{
\put(58,30){
\begin{tikzpicture}
\draw[->] (0,0) -- (1.6,0);
\end{tikzpicture}
}}
\visible<21-26>{
\put(58,30){
\begin{tikzpicture}
\draw[->] (0,0) -- (2.0,0);
\end{tikzpicture}
}}
\end{picture}
\end{figure}
}
\begin{minipage}{0.3\textwidth}\raggedleft
\visible<6->{
\vskip-10mm
\begin{figure}
\begin{picture}(40,20)
\visible<6>{\put(30,-12){\includegraphics[scale=0.4]{s}}}
\visible<7-12>{\put(29,-12){\includegraphics[scale=0.52]{s2}}}
\visible<13-24>{\put(29,-12){\includegraphics[scale=0.52]{s3}}}
\visible<25->{\put(29,-13){\includegraphics[scale=0.52]{s4}}}
\node(stext)(45,11){$S$}
\visible<9->{
\visible<9>{\put(-2,10){\includegraphics[scale=0.48]{t}}}
\visible<10-14>{\put(0,17){\includegraphics[scale=0.49]{t2}}}
\visible<15-19>{\put(0,17){\includegraphics[scale=0.49]{t3}}}
\visible<20-21>{\put(0,17){\includegraphics[scale=0.49]{t4}}}
\visible<22-24>{\put(0,17){\includegraphics[scale=0.49]{t5}}}
\visible<25->{\put(-2.5,18){\includegraphics[scale=0.49]{t6}}}
\node(stext)(0,25){$T$}
}
\visible<11-16>{\put(-2.5,-12){\includegraphics[scale=0.175]{grid}}}
\visible<17-22>{\put(-2.5,-12){\includegraphics[scale=0.175]{grid2}}}
\visible<23-23>{\put(-2.5,-12){\includegraphics[scale=0.175]{grid3}}}
\visible<24-24>{\put(-2.5,-12){\includegraphics[scale=0.175]{grid}}}
\visible<25-25>{\put(-4,-13){\includegraphics[scale=0.175]{grid4}}}
\visible<26->{\put(-4,-13){\includegraphics[scale=0.175]{grid5}}}
\end{picture}
\end{figure}
}
\end{minipage}
\hfill
\noindent\begin{minipage}{0.58\textwidth}\raggedright
\vskip-10mm
\begin{itemize}
\item<12->Find the shaded string in $S$
\item<14->Find another shaded string in $T$
\item<16->Perform orthogonal range search
\item<18->Extend $u_i$ on $\log_\sigma n$\visible<19->{, repeat}
\item<24->Include $t$ in the indexes $S$ and $T$
\item<26->Go to the next marked positions
\item<28->Time: $O(\frac{n}{\tau}(\frac{\tau^2}{\log_\sigma n}{+}\log n) +$ $\frac{n}{\log_\sigma n}\log n) = O(n\log\sigma)$
\item<29-> Space: $O(\frac{n}{\tau}\log n){=}O(\epsilon n)$ bits
\end{itemize}
\end{minipage}
\end{frame}



\begin{frame}
\frametitle{Open problems}
\begin{itemize}
\item<2-> $O(n)$ time in $O(n\log\sigma)$ bits of space?
\item<3-> $O(\frac{n}{\epsilon}\log\sigma)$ time in $n\log\sigma + \epsilon n$ bits of space?
\item<4-> $o(\frac{n}{\epsilon}\log\sigma)$ time in $n\log\sigma + \epsilon n$ bits of space? lower bounds?
\end{itemize}
\visible<5->{\center{\Huge{Thank you for your attention!}}
\begin{figure}
\includegraphics[scale=0.40]{hands}
\end{figure}}
\end{frame}

\end{document}
